import datetime
now = datetime.datetime.now()
print ("Ăltima versiĂłn:")
print (now.strftime("%Y-%m-%d %H:%M:%S"))
Ăltima versiĂłn: 2021-11-13 18:55:08
import pandas as pd
import numpy as np
import pandas_profiling
from itertools import combinations
from dateutil.parser import parse
import matplotlib as mpl
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import plotly.express as px
#datos con el ĂĄrea de las secciones (estimacion feb-2020, con la cartografĂa de las manzanas)
data_area = pd.read_csv("Datos_secc_area_2019_amanzanamiento.csv", dtype={'Area_Km2':float})
#datos con el ĂĄrea de las secciones (estimacion feb-2020, considerando las ĂĄreas totales)
data_area2 = pd.read_csv("Datos_secc_area_2019_total.csv", dtype={'Area_Km2':float})
#series de tiempo de LNE y PE por seccion (enero 2019 - diciembre 2019)
data_lne = pd.read_csv("Datos_secc_PE-LNE_2019.csv", parse_dates=['DATE'], dtype={'PE':float, 'LNE':float})
#datos con el tipo de secciĂłn (clasificaciĂłn de la Direccion de Cartografia)
data_tipo_secc = pd.read_csv('Datos_secc_tipo_2019.txt', sep="|")
#nomenclatura de los estados
data_nom_edo = pd.read_csv('Datos_Nomenclatura_EDO.csv', encoding='utf-8')
#datos con los nombres de los municipios
data_nom_mun = pd.read_csv('Datos_Nomenclatura_MUN.csv', encoding='utf-8')
print(data_tipo_secc.dtypes)
data_tipo_secc
EDO int64 DTO int64 MUN int64 SECC int64 TIPO_SECCION object dtype: object
| EDO | DTO | MUN | SECC | TIPO_SECCION | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | M |
| 1 | 1 | 1 | 2 | 339 | M |
| 2 | 1 | 1 | 2 | 340 | R |
| 3 | 1 | 1 | 2 | 341 | M |
| 4 | 1 | 1 | 2 | 342 | R |
| ... | ... | ... | ... | ... | ... |
| 68431 | 32 | 4 | 57 | 530 | M |
| 68432 | 32 | 4 | 57 | 531 | U |
| 68433 | 32 | 4 | 57 | 532 | U |
| 68434 | 32 | 4 | 57 | 533 | U |
| 68435 | 32 | 4 | 57 | 548 | R |
68436 rows Ă 5 columns
print(data_area.dtypes)
data_area
EDO int64 DTO int64 MUN int64 SECC int64 Area_Km2 float64 dtype: object
| EDO | DTO | MUN | SECC | Area_Km2 | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.506492 |
| 1 | 1 | 1 | 2 | 339 | 0.692053 |
| 2 | 1 | 1 | 2 | 340 | 0.990536 |
| 3 | 1 | 1 | 2 | 341 | 0.507441 |
| 4 | 1 | 1 | 2 | 342 | 1.437017 |
| ... | ... | ... | ... | ... | ... |
| 61035 | 32 | 4 | 57 | 529 | 0.545010 |
| 61036 | 32 | 4 | 57 | 530 | 0.757383 |
| 61037 | 32 | 4 | 57 | 531 | 0.548031 |
| 61038 | 32 | 4 | 57 | 532 | 0.149622 |
| 61039 | 32 | 4 | 57 | 533 | 0.559116 |
61040 rows Ă 5 columns
print(data_area2.dtypes)
data_area2
EDO int64 DTO int64 MUN int64 SECC int64 Area_Km2 float64 dtype: object
| EDO | DTO | MUN | SECC | Area_Km2 | |
|---|---|---|---|---|---|
| 0 | 2 | 2 | 2 | 264 | 1.304 |
| 1 | 2 | 3 | 1 | 73 | 0.199 |
| 2 | 2 | 2 | 2 | 266 | 0.154 |
| 3 | 2 | 3 | 1 | 77 | 0.328 |
| 4 | 2 | 3 | 1 | 78 | 0.164 |
| ... | ... | ... | ... | ... | ... |
| 68359 | 15 | 36 | 106 | 5151 | 54.406 |
| 68360 | 12 | 5 | 71 | 2670 | 90.644 |
| 68361 | 12 | 5 | 44 | 1736 | 67.699 |
| 68362 | 15 | 1 | 103 | 4728 | 19.654 |
| 68363 | 16 | 4 | 53 | 940 | 16.830 |
68364 rows Ă 5 columns
print(data_lne.dtypes)
data_lne
DATE datetime64[ns] EDO int64 DTO int64 MUN int64 SECC int64 PE float64 LNE float64 dtype: object
| DATE | EDO | DTO | MUN | SECC | PE | LNE | |
|---|---|---|---|---|---|---|---|
| 0 | 2019-01-01 | 1 | 3 | 1 | 1 | 2163.0 | 2128.0 |
| 1 | 2019-01-01 | 1 | 3 | 1 | 2 | 903.0 | 891.0 |
| 2 | 2019-01-01 | 1 | 3 | 1 | 3 | 1710.0 | 1689.0 |
| 3 | 2019-01-01 | 1 | 3 | 1 | 4 | 1413.0 | 1397.0 |
| 4 | 2019-01-01 | 1 | 3 | 1 | 5 | 788.0 | 782.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 821125 | 2019-12-01 | 32 | 4 | 57 | 530 | 1671.0 | 1649.0 |
| 821126 | 2019-12-01 | 32 | 4 | 57 | 531 | 1601.0 | 1581.0 |
| 821127 | 2019-12-01 | 32 | 4 | 57 | 532 | 1081.0 | 1065.0 |
| 821128 | 2019-12-01 | 32 | 4 | 57 | 533 | 1183.0 | 1164.0 |
| 821129 | 2019-12-01 | 32 | 4 | 57 | 548 | 119.0 | 117.0 |
821130 rows Ă 7 columns
print(data_nom_edo.dtypes)
print(data_nom_mun.dtypes)
EDO int64 NOMBRE_ESTADO object dtype: object EDO int64 MUN int64 NOMBRE_MUNICIPIO object dtype: object
#ver si hay valores nulos en las series de tiempo data_lne
data_lne.isnull().sum()
DATE 0 EDO 0 DTO 0 MUN 0 SECC 0 PE 0 LNE 0 dtype: int64
#ver si hay valores nulos
data_area.isnull().sum()
EDO 0 DTO 0 MUN 0 SECC 0 Area_Km2 0 dtype: int64
#ver valores unicos
data_area.groupby(['EDO','DTO','MUN','SECC']).ngroups
61040
#ver si hay valores nulos
data_area2.isnull().sum()
EDO 0 DTO 0 MUN 0 SECC 0 Area_Km2 0 dtype: int64
#ver valores unicos
data_area2.groupby(['EDO','DTO','MUN','SECC']).ngroups
68364
#ver si hay valores nulos
data_tipo_secc.isnull().sum()
EDO 0 DTO 0 MUN 0 SECC 0 TIPO_SECCION 0 dtype: int64
#se eliminan las filas con valores vacĂos
#data_lne.dropna()
#Se agrupan los datos por secciones
#se agrupa con las columnas Date, EDO, MUN
datos_lne = data_lne.groupby(['DATE','EDO','DTO','MUN','SECC']).agg(
{
# find sum of PE
'PE': "sum",
# find sum of LNE
'LNE': "sum"
}
)
#se resetea el index
datos_lne = datos_lne.reset_index()
datos_lne = datos_lne.groupby(['EDO','DTO','MUN','SECC'])
#se imprimen los primeros diez valores
datos_lne.head(10)
| DATE | EDO | DTO | MUN | SECC | PE | LNE | |
|---|---|---|---|---|---|---|---|
| 0 | 2019-01-01 | 1 | 1 | 2 | 338 | 1803.0 | 1790.0 |
| 1 | 2019-01-01 | 1 | 1 | 2 | 339 | 1713.0 | 1693.0 |
| 2 | 2019-01-01 | 1 | 1 | 2 | 340 | 1741.0 | 1717.0 |
| 3 | 2019-01-01 | 1 | 1 | 2 | 341 | 2059.0 | 2036.0 |
| 4 | 2019-01-01 | 1 | 1 | 2 | 342 | 1553.0 | 1513.0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 684269 | 2019-10-01 | 32 | 4 | 57 | 530 | 1671.0 | 1660.0 |
| 684270 | 2019-10-01 | 32 | 4 | 57 | 531 | 1592.0 | 1582.0 |
| 684271 | 2019-10-01 | 32 | 4 | 57 | 532 | 1079.0 | 1068.0 |
| 684272 | 2019-10-01 | 32 | 4 | 57 | 533 | 1185.0 | 1170.0 |
| 684273 | 2019-10-01 | 32 | 4 | 57 | 548 | 119.0 | 119.0 |
684274 rows Ă 7 columns
#datos de enero de todas los secciones
enero=datos_lne.first() #first para tomar los primeros valores correspondientes a enero
enero2019=enero.drop(['DATE'], axis=1) #eliminamos DATE
enero_2019 = enero2019.reset_index() #recuperamos el Ăndice
enero_2019
| EDO | DTO | MUN | SECC | PE | LNE | |
|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 1803.0 | 1790.0 |
| 1 | 1 | 1 | 2 | 339 | 1713.0 | 1693.0 |
| 2 | 1 | 1 | 2 | 340 | 1741.0 | 1717.0 |
| 3 | 1 | 1 | 2 | 341 | 2059.0 | 2036.0 |
| 4 | 1 | 1 | 2 | 342 | 1553.0 | 1513.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | 1608.0 | 1586.0 |
| 68424 | 32 | 4 | 57 | 531 | 1542.0 | 1525.0 |
| 68425 | 32 | 4 | 57 | 532 | 1060.0 | 1042.0 |
| 68426 | 32 | 4 | 57 | 533 | 1123.0 | 1106.0 |
| 68427 | 32 | 4 | 57 | 548 | 116.0 | 114.0 |
68428 rows Ă 6 columns
#datos de diciembre de todas los secciones
diciembre=datos_lne.last()
diciembre2019=diciembre.drop(['DATE'], axis=1)
diciembre_2019 = diciembre2019.reset_index() #recuperamos el Ăndice
diciembre_2019
| EDO | DTO | MUN | SECC | PE | LNE | |
|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 1786.0 | 1770.0 |
| 1 | 1 | 1 | 2 | 339 | 1724.0 | 1707.0 |
| 2 | 1 | 1 | 2 | 340 | 1771.0 | 1751.0 |
| 3 | 1 | 1 | 2 | 341 | 2074.0 | 2041.0 |
| 4 | 1 | 1 | 2 | 342 | 1559.0 | 1520.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | 1671.0 | 1649.0 |
| 68424 | 32 | 4 | 57 | 531 | 1601.0 | 1581.0 |
| 68425 | 32 | 4 | 57 | 532 | 1081.0 | 1065.0 |
| 68426 | 32 | 4 | 57 | 533 | 1183.0 | 1164.0 |
| 68427 | 32 | 4 | 57 | 548 | 119.0 | 117.0 |
68428 rows Ă 6 columns
Procesamiento datos årea urbanizada y total (para las secciones sin amanzanamiento)¶
#se filtran y seleccionan de data_area2 las secciones que no tiene dato de amanzanamiento
data_area3 = (
data_area2.merge(data_area,
on=['EDO','DTO','MUN','SECC'],
how='left',
indicator=True)
.query('_merge == "left_only"')
.rename(columns={'Area_Km2_x':'Area_Km2'})
.drop(columns=['_merge', 'Area_Km2_y'])
.reset_index()
.drop(columns='index')
)
data_area3
| EDO | DTO | MUN | SECC | Area_Km2 | |
|---|---|---|---|---|---|
| 0 | 2 | 7 | 2 | 661 | 84.616 |
| 1 | 2 | 3 | 1 | 184 | 1069.294 |
| 2 | 2 | 1 | 2 | 587 | 8.029 |
| 3 | 2 | 7 | 1 | 170 | 2670.323 |
| 4 | 2 | 3 | 1 | 200 | 5647.802 |
| ... | ... | ... | ... | ... | ... |
| 7390 | 4 | 1 | 6 | 393 | 401.257 |
| 7391 | 4 | 1 | 11 | 427 | 606.415 |
| 7392 | 4 | 2 | 3 | 284 | 654.449 |
| 7393 | 4 | 2 | 7 | 432 | 19.803 |
| 7394 | 15 | 36 | 118 | 5821 | 20.655 |
7395 rows Ă 5 columns
#ver valores unicos
data_area3.groupby(['EDO','DTO','MUN','SECC']).ngroups
7395
data_area_todas = pd.concat([data_area, data_area3], axis=0).reset_index().drop(columns='index')
data_area_todas
| EDO | DTO | MUN | SECC | Area_Km2 | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.506492 |
| 1 | 1 | 1 | 2 | 339 | 0.692053 |
| 2 | 1 | 1 | 2 | 340 | 0.990536 |
| 3 | 1 | 1 | 2 | 341 | 0.507441 |
| 4 | 1 | 1 | 2 | 342 | 1.437017 |
| ... | ... | ... | ... | ... | ... |
| 68430 | 4 | 1 | 6 | 393 | 401.257000 |
| 68431 | 4 | 1 | 11 | 427 | 606.415000 |
| 68432 | 4 | 2 | 3 | 284 | 654.449000 |
| 68433 | 4 | 2 | 7 | 432 | 19.803000 |
| 68434 | 15 | 36 | 118 | 5821 | 20.655000 |
68435 rows Ă 5 columns
#ver valores unicos
data_area_todas.groupby(['EDO','DTO','MUN','SECC']).ngroups
68435
#Se agrupan los datos por secciones
#se agrupa con las columnas Date, EDO, MUN, etc
data_area = data_area_todas.groupby(['EDO','DTO','MUN','SECC']).agg(
{
# find sum of Area_Km2
'Area_Km2': "sum"
}
)
#se resetea el index
data_area = data_area.reset_index()
data_area
| EDO | DTO | MUN | SECC | Area_Km2 | |
|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.506492 |
| 1 | 1 | 1 | 2 | 339 | 0.692053 |
| 2 | 1 | 1 | 2 | 340 | 0.990536 |
| 3 | 1 | 1 | 2 | 341 | 0.507441 |
| 4 | 1 | 1 | 2 | 342 | 1.437017 |
| ... | ... | ... | ... | ... | ... |
| 68430 | 32 | 4 | 57 | 530 | 0.757383 |
| 68431 | 32 | 4 | 57 | 531 | 0.548031 |
| 68432 | 32 | 4 | 57 | 532 | 0.149622 |
| 68433 | 32 | 4 | 57 | 533 | 0.559116 |
| 68434 | 32 | 4 | 57 | 548 | 22.948000 |
68435 rows Ă 5 columns
#data_tipo_secc=data_tipo_secc.groupby(['EDO','MUN'])['TIPO_SECCION'].apply(lambda x: (x=='R').sum()).reset_index(name='NUM_SECC_RUR')
#data_tipo_secc
A partir de las bases de datos, se procesan y construyen los siete indicadores base:
#calculamos los coeficientes de variacion del PE y la LNE para todas las secciones
datos_vc = datos_lne.agg({'PE': lambda x: np.std(x)/np.mean(x),'LNE': lambda x: np.std(x)/np.mean(x)}).rename(columns={'PE': 'Coef_Var_PE', 'LNE': 'Coef_Var_LNE'})
datos_vc
| Coef_Var_PE | Coef_Var_LNE | ||||
|---|---|---|---|---|---|
| EDO | DTO | MUN | SECC | ||
| 1 | 1 | 2 | 338 | 0.009274 | 0.014711 |
| 339 | 0.006067 | 0.010667 | |||
| 340 | 0.007424 | 0.013348 | |||
| 341 | 0.006535 | 0.012118 | |||
| 342 | 0.013673 | 0.020742 | |||
| ... | ... | ... | ... | ... | ... |
| 32 | 4 | 57 | 530 | 0.013450 | 0.014736 |
| 531 | 0.012040 | 0.012231 | |||
| 532 | 0.006251 | 0.008482 | |||
| 533 | 0.018088 | 0.018581 | |||
| 548 | 0.013194 | 0.016949 |
68428 rows Ă 2 columns
#calculamos los coeficientes de autocorrelacion (lag=1month) para todos los secciones
datos_autcorr = datos_lne.agg({'PE': lambda x: x.autocorr(lag=1),'LNE': lambda x: x.autocorr(lag=1)}).rename(columns={'PE': 'Autocorr_PE', 'LNE': 'Autocorr_LNE'})
datos_autcorr
| Autocorr_PE | Autocorr_LNE | ||||
|---|---|---|---|---|---|
| EDO | DTO | MUN | SECC | ||
| 1 | 1 | 2 | 338 | 0.689191 | 0.581325 |
| 339 | 0.717164 | 0.158740 | |||
| 340 | 0.827905 | 0.152959 | |||
| 341 | 0.635793 | 0.435180 | |||
| 342 | 0.634410 | 0.367652 | |||
| ... | ... | ... | ... | ... | ... |
| 32 | 4 | 57 | 530 | 0.955188 | 0.952950 |
| 531 | 0.974564 | 0.961922 | |||
| 532 | 0.934510 | 0.681005 | |||
| 533 | 0.985049 | 0.928739 | |||
| 548 | 0.598121 | 0.835766 |
68428 rows Ă 2 columns
#calculamos la razĂłn media de LNE/PE para todas los secciones
data_lne['Razon_LNE_PE'] = data_lne['LNE']/data_lne['PE'] #calculamos la razĂłn para todos los secciones y meses
datos_ratio=data_lne.groupby(['EDO','DTO','MUN','SECC'])['Razon_LNE_PE'].mean()
datos_ratio
EDO DTO MUN SECC
1 1 2 338 0.991580
339 0.990970
340 0.989805
341 0.987758
342 0.984600
...
32 4 57 530 0.988422
531 0.989747
532 0.986724
533 0.986229
548 0.990182
Name: Razon_LNE_PE, Length: 68428, dtype: float64
#calculamos la tasa de crecimiento de la LNE entre enero2019 y diciembre2019
diciembre_2019['TC_LNE_2019'] = (diciembre_2019['LNE'] - enero_2019['LNE'])/enero_2019['LNE']
diciembre_2019
| EDO | DTO | MUN | SECC | PE | LNE | TC_LNE_2019 | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 1786.0 | 1770.0 | -0.011173 |
| 1 | 1 | 1 | 2 | 339 | 1724.0 | 1707.0 | 0.008269 |
| 2 | 1 | 1 | 2 | 340 | 1771.0 | 1751.0 | 0.019802 |
| 3 | 1 | 1 | 2 | 341 | 2074.0 | 2041.0 | 0.002456 |
| 4 | 1 | 1 | 2 | 342 | 1559.0 | 1520.0 | 0.004627 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | 1671.0 | 1649.0 | 0.039723 |
| 68424 | 32 | 4 | 57 | 531 | 1601.0 | 1581.0 | 0.036721 |
| 68425 | 32 | 4 | 57 | 532 | 1081.0 | 1065.0 | 0.022073 |
| 68426 | 32 | 4 | 57 | 533 | 1183.0 | 1164.0 | 0.052441 |
| 68427 | 32 | 4 | 57 | 548 | 119.0 | 117.0 | 0.026316 |
68428 rows Ă 7 columns
#FunciĂłn para calcular PV (Variacion Proporcional)
def PVar(lista):
n = len(lista)
CC= 2/(n*(n-1))
sumaD = sum([(abs(i-j)/max(i,j)) for i,j in list(combinations(lista, 2))])
return CC*sumaD
#ejemplo
array = np.arange(20).reshape(4,5)
print(array)
lista= array[:,1]
lista
[[ 0 1 2 3 4] [ 5 6 7 8 9] [10 11 12 13 14] [15 16 17 18 19]]
array([ 1, 6, 11, 16])
PVar(lista)
0.6786616161616161
#ejemplo
a = pd.DataFrame(array)
print(a)
print(list(a.columns))
0 1 2 3 4 0 0 1 2 3 4 1 5 6 7 8 9 2 10 11 12 13 14 3 15 16 17 18 19 [0, 1, 2, 3, 4]
a[1]
0 1 1 6 2 11 3 16 Name: 1, dtype: int32
a.agg({1: lambda x: PVar(x)})
1 0.678662 dtype: float64
#calculamos la proportional variability para cada seccion
datos_PV = datos_lne.agg({'PE': lambda x: PVar(x),'LNE': lambda x: PVar(x)}).rename(columns={'PE': 'Var_Prop_PE', 'LNE': 'Var_Prop_LNE'})
datos_PV
| Var_Prop_PE | Var_Prop_LNE | ||||
|---|---|---|---|---|---|
| EDO | DTO | MUN | SECC | ||
| 1 | 1 | 2 | 338 | 0.011123 | 0.017569 |
| 339 | 0.007416 | 0.010046 | |||
| 340 | 0.008319 | 0.013217 | |||
| 341 | 0.007678 | 0.014344 | |||
| 342 | 0.016305 | 0.024695 | |||
| ... | ... | ... | ... | ... | ... |
| 32 | 4 | 57 | 530 | 0.015733 | 0.017848 |
| 531 | 0.014507 | 0.014973 | |||
| 532 | 0.007321 | 0.010120 | |||
| 533 | 0.021717 | 0.022744 | |||
| 548 | 0.014626 | 0.020291 |
68428 rows Ă 2 columns
#se unen coeficiente de variaciĂłn y proportional variability
datos1 = datos_vc.join(datos_PV, lsuffix='_caller', rsuffix='_other')
datos1
| Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | ||||
|---|---|---|---|---|---|---|---|
| EDO | DTO | MUN | SECC | ||||
| 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 |
| 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | |||
| 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | |||
| 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | |||
| 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | |||
| ... | ... | ... | ... | ... | ... | ... | ... |
| 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 |
| 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | |||
| 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | |||
| 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | |||
| 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 |
68428 rows Ă 4 columns
#se agrega autocorrelacion
datos2019_secc = datos1.join(datos_autcorr, lsuffix='_caller', rsuffix='_other')
datos2019_secc
| Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | ||||
|---|---|---|---|---|---|---|---|---|---|
| EDO | DTO | MUN | SECC | ||||||
| 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 | 0.689191 | 0.581325 |
| 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | 0.717164 | 0.158740 | |||
| 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | 0.827905 | 0.152959 | |||
| 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | 0.635793 | 0.435180 | |||
| 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | 0.634410 | 0.367652 | |||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 | 0.955188 | 0.952950 |
| 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | 0.974564 | 0.961922 | |||
| 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | 0.934510 | 0.681005 | |||
| 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | 0.985049 | 0.928739 | |||
| 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 | 0.598121 | 0.835766 |
68428 rows Ă 6 columns
#se agrega razon media LNE/PE
datos2019_secc = datos2019_secc.join(datos_ratio, lsuffix='_caller', rsuffix='_other')
datos2019_secc
| Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | Razon_LNE_PE | ||||
|---|---|---|---|---|---|---|---|---|---|---|
| EDO | DTO | MUN | SECC | |||||||
| 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 | 0.689191 | 0.581325 | 0.991580 |
| 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | 0.717164 | 0.158740 | 0.990970 | |||
| 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | 0.827905 | 0.152959 | 0.989805 | |||
| 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | 0.635793 | 0.435180 | 0.987758 | |||
| 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | 0.634410 | 0.367652 | 0.984600 | |||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 | 0.955188 | 0.952950 | 0.988422 |
| 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | 0.974564 | 0.961922 | 0.989747 | |||
| 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | 0.934510 | 0.681005 | 0.986724 | |||
| 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | 0.985049 | 0.928739 | 0.986229 | |||
| 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 | 0.598121 | 0.835766 | 0.990182 |
68428 rows Ă 7 columns
#se recupera el Ăndice (desagrupa)
datos2019_secc= datos2019_secc.reset_index()
datos2019_secc
| EDO | DTO | MUN | SECC | Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | Razon_LNE_PE | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 | 0.689191 | 0.581325 | 0.991580 |
| 1 | 1 | 1 | 2 | 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | 0.717164 | 0.158740 | 0.990970 |
| 2 | 1 | 1 | 2 | 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | 0.827905 | 0.152959 | 0.989805 |
| 3 | 1 | 1 | 2 | 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | 0.635793 | 0.435180 | 0.987758 |
| 4 | 1 | 1 | 2 | 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | 0.634410 | 0.367652 | 0.984600 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 | 0.955188 | 0.952950 | 0.988422 |
| 68424 | 32 | 4 | 57 | 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | 0.974564 | 0.961922 | 0.989747 |
| 68425 | 32 | 4 | 57 | 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | 0.934510 | 0.681005 | 0.986724 |
| 68426 | 32 | 4 | 57 | 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | 0.985049 | 0.928739 | 0.986229 |
| 68427 | 32 | 4 | 57 | 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 | 0.598121 | 0.835766 | 0.990182 |
68428 rows Ă 11 columns
#datos al corte de diciembre 2019
diciembre_2019
| EDO | DTO | MUN | SECC | PE | LNE | TC_LNE_2019 | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 1786.0 | 1770.0 | -0.011173 |
| 1 | 1 | 1 | 2 | 339 | 1724.0 | 1707.0 | 0.008269 |
| 2 | 1 | 1 | 2 | 340 | 1771.0 | 1751.0 | 0.019802 |
| 3 | 1 | 1 | 2 | 341 | 2074.0 | 2041.0 | 0.002456 |
| 4 | 1 | 1 | 2 | 342 | 1559.0 | 1520.0 | 0.004627 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | 1671.0 | 1649.0 | 0.039723 |
| 68424 | 32 | 4 | 57 | 531 | 1601.0 | 1581.0 | 0.036721 |
| 68425 | 32 | 4 | 57 | 532 | 1081.0 | 1065.0 | 0.022073 |
| 68426 | 32 | 4 | 57 | 533 | 1183.0 | 1164.0 | 0.052441 |
| 68427 | 32 | 4 | 57 | 548 | 119.0 | 117.0 | 0.026316 |
68428 rows Ă 7 columns
#se agregan los datos de PE, LNE (diciembre2019) y la tasa de crecimiento anual 2019 de la LNE
datos2019_secc_vf = pd.merge(datos2019_secc, diciembre_2019, on=['EDO','DTO','MUN','SECC'], how='inner')
datos2019_secc_vf
| EDO | DTO | MUN | SECC | Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | Razon_LNE_PE | PE | LNE | TC_LNE_2019 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 | 0.689191 | 0.581325 | 0.991580 | 1786.0 | 1770.0 | -0.011173 |
| 1 | 1 | 1 | 2 | 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | 0.717164 | 0.158740 | 0.990970 | 1724.0 | 1707.0 | 0.008269 |
| 2 | 1 | 1 | 2 | 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | 0.827905 | 0.152959 | 0.989805 | 1771.0 | 1751.0 | 0.019802 |
| 3 | 1 | 1 | 2 | 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | 0.635793 | 0.435180 | 0.987758 | 2074.0 | 2041.0 | 0.002456 |
| 4 | 1 | 1 | 2 | 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | 0.634410 | 0.367652 | 0.984600 | 1559.0 | 1520.0 | 0.004627 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 | 0.955188 | 0.952950 | 0.988422 | 1671.0 | 1649.0 | 0.039723 |
| 68424 | 32 | 4 | 57 | 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | 0.974564 | 0.961922 | 0.989747 | 1601.0 | 1581.0 | 0.036721 |
| 68425 | 32 | 4 | 57 | 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | 0.934510 | 0.681005 | 0.986724 | 1081.0 | 1065.0 | 0.022073 |
| 68426 | 32 | 4 | 57 | 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | 0.985049 | 0.928739 | 0.986229 | 1183.0 | 1164.0 | 0.052441 |
| 68427 | 32 | 4 | 57 | 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 | 0.598121 | 0.835766 | 0.990182 | 119.0 | 117.0 | 0.026316 |
68428 rows Ă 14 columns
#se unen los datos de area y tipo de secciĂłn
data_area_tipo = pd.merge(data_tipo_secc, data_area, on=['EDO','DTO','MUN','SECC'], how='inner')
data_area_tipo
| EDO | DTO | MUN | SECC | TIPO_SECCION | Area_Km2 | |
|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | M | 0.506492 |
| 1 | 1 | 1 | 2 | 339 | M | 0.692053 |
| 2 | 1 | 1 | 2 | 340 | R | 0.990536 |
| 3 | 1 | 1 | 2 | 341 | M | 0.507441 |
| 4 | 1 | 1 | 2 | 342 | R | 1.437017 |
| ... | ... | ... | ... | ... | ... | ... |
| 68423 | 32 | 4 | 57 | 530 | M | 0.757383 |
| 68424 | 32 | 4 | 57 | 531 | U | 0.548031 |
| 68425 | 32 | 4 | 57 | 532 | U | 0.149622 |
| 68426 | 32 | 4 | 57 | 533 | U | 0.559116 |
| 68427 | 32 | 4 | 57 | 548 | R | 22.948000 |
68428 rows Ă 6 columns
#se unen los datos de area_tipo secciĂłn con los datos de feature engineering
datos2019_secc_vf2 = pd.merge(datos2019_secc_vf, data_area_tipo, on=['EDO','DTO','MUN','SECC'], how='inner')
datos2019_secc_vf2
| EDO | DTO | MUN | SECC | Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | Razon_LNE_PE | PE | LNE | TC_LNE_2019 | TIPO_SECCION | Area_Km2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 | 0.689191 | 0.581325 | 0.991580 | 1786.0 | 1770.0 | -0.011173 | M | 0.506492 |
| 1 | 1 | 1 | 2 | 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | 0.717164 | 0.158740 | 0.990970 | 1724.0 | 1707.0 | 0.008269 | M | 0.692053 |
| 2 | 1 | 1 | 2 | 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | 0.827905 | 0.152959 | 0.989805 | 1771.0 | 1751.0 | 0.019802 | R | 0.990536 |
| 3 | 1 | 1 | 2 | 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | 0.635793 | 0.435180 | 0.987758 | 2074.0 | 2041.0 | 0.002456 | M | 0.507441 |
| 4 | 1 | 1 | 2 | 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | 0.634410 | 0.367652 | 0.984600 | 1559.0 | 1520.0 | 0.004627 | R | 1.437017 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 68389 | 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 | 0.955188 | 0.952950 | 0.988422 | 1671.0 | 1649.0 | 0.039723 | M | 0.757383 |
| 68390 | 32 | 4 | 57 | 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | 0.974564 | 0.961922 | 0.989747 | 1601.0 | 1581.0 | 0.036721 | U | 0.548031 |
| 68391 | 32 | 4 | 57 | 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | 0.934510 | 0.681005 | 0.986724 | 1081.0 | 1065.0 | 0.022073 | U | 0.149622 |
| 68392 | 32 | 4 | 57 | 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | 0.985049 | 0.928739 | 0.986229 | 1183.0 | 1164.0 | 0.052441 | U | 0.559116 |
| 68393 | 32 | 4 | 57 | 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 | 0.598121 | 0.835766 | 0.990182 | 119.0 | 117.0 | 0.026316 | R | 22.948000 |
68394 rows Ă 16 columns
#calculamos la densidad por seccion (al corte de diciembre 2019)
datos2019_secc_vf2['Densidad_LNE'] = datos2019_secc_vf2['LNE']/datos2019_secc_vf2['Area_Km2']
datos2019_secc_vf2
| EDO | DTO | MUN | SECC | Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | Razon_LNE_PE | PE | LNE | TC_LNE_2019 | TIPO_SECCION | Area_Km2 | Densidad_LNE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 2 | 338 | 0.009274 | 0.014711 | 0.011123 | 0.017569 | 0.689191 | 0.581325 | 0.991580 | 1786.0 | 1770.0 | -0.011173 | M | 0.506492 | 3494.628518 |
| 1 | 1 | 1 | 2 | 339 | 0.006067 | 0.010667 | 0.007416 | 0.010046 | 0.717164 | 0.158740 | 0.990970 | 1724.0 | 1707.0 | 0.008269 | M | 0.692053 | 2466.573479 |
| 2 | 1 | 1 | 2 | 340 | 0.007424 | 0.013348 | 0.008319 | 0.013217 | 0.827905 | 0.152959 | 0.989805 | 1771.0 | 1751.0 | 0.019802 | R | 0.990536 | 1767.730639 |
| 3 | 1 | 1 | 2 | 341 | 0.006535 | 0.012118 | 0.007678 | 0.014344 | 0.635793 | 0.435180 | 0.987758 | 2074.0 | 2041.0 | 0.002456 | M | 0.507441 | 4022.144814 |
| 4 | 1 | 1 | 2 | 342 | 0.013673 | 0.020742 | 0.016305 | 0.024695 | 0.634410 | 0.367652 | 0.984600 | 1559.0 | 1520.0 | 0.004627 | R | 1.437017 | 1057.746932 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 68389 | 32 | 4 | 57 | 530 | 0.013450 | 0.014736 | 0.015733 | 0.017848 | 0.955188 | 0.952950 | 0.988422 | 1671.0 | 1649.0 | 0.039723 | M | 0.757383 | 2177.233490 |
| 68390 | 32 | 4 | 57 | 531 | 0.012040 | 0.012231 | 0.014507 | 0.014973 | 0.974564 | 0.961922 | 0.989747 | 1601.0 | 1581.0 | 0.036721 | U | 0.548031 | 2884.873238 |
| 68391 | 32 | 4 | 57 | 532 | 0.006251 | 0.008482 | 0.007321 | 0.010120 | 0.934510 | 0.681005 | 0.986724 | 1081.0 | 1065.0 | 0.022073 | U | 0.149622 | 7117.946288 |
| 68392 | 32 | 4 | 57 | 533 | 0.018088 | 0.018581 | 0.021717 | 0.022744 | 0.985049 | 0.928739 | 0.986229 | 1183.0 | 1164.0 | 0.052441 | U | 0.559116 | 2081.857887 |
| 68393 | 32 | 4 | 57 | 548 | 0.013194 | 0.016949 | 0.014626 | 0.020291 | 0.598121 | 0.835766 | 0.990182 | 119.0 | 117.0 | 0.026316 | R | 22.948000 | 5.098484 |
68394 rows Ă 17 columns
#se combinan las nomenclaturas de estados y municipios
nomenclatura=pd.merge(data_nom_edo,data_nom_mun, on=['EDO'], how='inner')
nomenclatura
| EDO | NOMBRE_ESTADO | MUN | NOMBRE_MUNICIPIO | |
|---|---|---|---|---|
| 0 | 1 | AGUASCALIENTES | 10 | SAN FRANCISCO DE LOS ROMO |
| 1 | 1 | AGUASCALIENTES | 8 | SAN JOSE DE GRACIA |
| 2 | 1 | AGUASCALIENTES | 9 | TEPEZALA |
| 3 | 1 | AGUASCALIENTES | 11 | EL LLANO |
| 4 | 1 | AGUASCALIENTES | 2 | ASIENTOS |
| ... | ... | ... | ... | ... |
| 2453 | 32 | ZACATECAS | 45 | TEPECHITLAN |
| 2454 | 32 | ZACATECAS | 14 | GENERAL FRANCISCO R. MURGUIA |
| 2455 | 32 | ZACATECAS | 44 | TABASCO |
| 2456 | 32 | ZACATECAS | 9 | CHALCHIHUITES |
| 2457 | 32 | ZACATECAS | 11 | TRINIDAD GARCIA DE LA CADENA |
2458 rows Ă 4 columns
#se concatenan los nombre de los estados y municipios con los resultados:
datos_procesados=pd.merge(nomenclatura, datos2019_secc_vf2, on=['EDO','MUN'], how='inner')
datos_procesados
| EDO | NOMBRE_ESTADO | MUN | NOMBRE_MUNICIPIO | DTO | SECC | Coef_Var_PE | Coef_Var_LNE | Var_Prop_PE | Var_Prop_LNE | Autocorr_PE | Autocorr_LNE | Razon_LNE_PE | PE | LNE | TC_LNE_2019 | TIPO_SECCION | Area_Km2 | Densidad_LNE | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | AGUASCALIENTES | 10 | SAN FRANCISCO DE LOS ROMO | 1 | 404 | 0.010392 | 0.013701 | 0.011281 | 0.015822 | 0.547527 | 0.494467 | 0.992982 | 804.0 | 796.0 | -0.008717 | R | 0.255615 | 3114.052233 |
| 1 | 1 | AGUASCALIENTES | 10 | SAN FRANCISCO DE LOS ROMO | 1 | 470 | 0.009898 | 0.015790 | 0.011420 | 0.018778 | 0.606021 | 0.459351 | 0.986038 | 1340.0 | 1311.0 | -0.005311 | M | 0.497880 | 2633.165586 |
| 2 | 1 | AGUASCALIENTES | 10 | SAN FRANCISCO DE LOS ROMO | 1 | 471 | 0.006650 | 0.011982 | 0.008169 | 0.014270 | 0.707951 | 0.512860 | 0.989025 | 1608.0 | 1582.0 | -0.006905 | U | 0.438174 | 3610.437533 |
| 3 | 1 | AGUASCALIENTES | 10 | SAN FRANCISCO DE LOS ROMO | 1 | 472 | 0.010460 | 0.014940 | 0.012687 | 0.018002 | 0.795567 | 0.617178 | 0.990770 | 1414.0 | 1394.0 | -0.014841 | U | 0.404232 | 3448.510713 |
| 4 | 1 | AGUASCALIENTES | 10 | SAN FRANCISCO DE LOS ROMO | 1 | 473 | 0.011303 | 0.011558 | 0.013614 | 0.013143 | 0.893573 | 0.439638 | 0.990124 | 8526.0 | 8412.0 | 0.027859 | M | 2.190191 | 3840.761507 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 68389 | 32 | ZACATECAS | 11 | TRINIDAD GARCIA DE LA CADENA | 2 | 352 | 0.022106 | 0.027715 | 0.025957 | 0.032934 | 0.949893 | 0.919404 | 0.973718 | 407.0 | 395.0 | 0.076294 | U | 0.216138 | 1827.533786 |
| 68390 | 32 | ZACATECAS | 11 | TRINIDAD GARCIA DE LA CADENA | 2 | 353 | 0.027591 | 0.035514 | 0.031829 | 0.041410 | 0.937516 | 0.939714 | 0.972815 | 382.0 | 374.0 | 0.119760 | M | 0.432270 | 865.200585 |
| 68391 | 32 | ZACATECAS | 11 | TRINIDAD GARCIA DE LA CADENA | 2 | 354 | 0.049990 | 0.069392 | 0.056654 | 0.077790 | 0.970217 | 0.942633 | 0.973603 | 103.0 | 101.0 | 0.262500 | R | 58.018000 | 1.740839 |
| 68392 | 32 | ZACATECAS | 11 | TRINIDAD GARCIA DE LA CADENA | 2 | 356 | 0.033608 | 0.047956 | 0.039083 | 0.055342 | 0.947350 | 0.977154 | 0.971920 | 367.0 | 360.0 | 0.165049 | R | 2.157981 | 166.822568 |
| 68393 | 32 | ZACATECAS | 11 | TRINIDAD GARCIA DE LA CADENA | 2 | 358 | 0.060953 | 0.086876 | 0.064359 | 0.093083 | 0.912792 | 0.935511 | 0.974554 | 57.0 | 57.0 | 0.266667 | R | 113.055000 | 0.504179 |
68394 rows Ă 19 columns
#con pandas_profiling se crea una interface para visualizar un resumen del dataframe de los resultados
import pandas_profiling
datos_procesados.profile_report()